

####################################################
####          Replication Code for              ####
####     Bayesian versus maximum likelihood     ####
####      estimation of treatment effects       ####
####          in bivariate probit               ####  
####       instrumentalvariable models          ####    
####################################################
####                                            ####  
####                4/ 18/2017                  ####         
####     this file runs rcode to create Table 4 ####
####      and Figure 3                          ####
####################################################





## ATE = pnorm(x*gamma2 + beta) - pnorm(x*gamma2) for all draws from the mcmc
perry.data = read_dta("perry.data.dta")
load("perry.fit.rda")
results.perry = extract(perry.fit)


#ATE.perry = pnorm(results.perry$beta[,2,]*1 + results.perry$beta2) - (pnorm(results.perry$beta[,2,]*1))
ATE.perry = ATE_fn(results.perry$beta[,2,], results.perry$beta2,rep(1,dim(perry.data)[1]))

## mean and sd
mean(ATE.perry)
sd(ATE.perry)

summary(perry.data)


## LATE
LATE.dist.perry = rep(NA,4000)
for(i in 1:4000){
  LATE.dist.perry[i] = LATE_fn(gamma1=results.perry$beta[i,1,],gamma2 = results.perry$beta[i,2,], pi = results.perry$pi[i],beta = results.perry$beta2[i],x_i = rep(1,dim(perry.data)[1]) ,Omega = results.perry$Omega[,1,][i,2])
}
mean(LATE.dist.perry)
sd(LATE.dist.perry)



## TOT
## inefficient loop for TOT
TOT.dist.perry = rep(NA,4000)

for(mcmc_draw in 1:4000){
    mean11 = rep(1,length(perry.data$treatment))*results.perry$beta[mcmc_draw,1,] + perry.data$treatment*results.perry$pi[mcmc_draw]
    mean12 = rep(1,length(perry.data$treatment))*results.perry$beta[mcmc_draw,2,] + results.perry$beta2[mcmc_draw]
    mean21 = rep(1,length(perry.data$treatment))*results.perry$beta[mcmc_draw,1,] + perry.data$treatment*results.perry$pi[mcmc_draw]
    mean22 = rep(1,length(perry.data$treatment))*results.perry$beta[mcmc_draw,2,] 
    top =  pbivnorm(mean11,mean12, results.perry$Omega[,1,][mcmc_draw,2]) -  pbivnorm(mean21,mean22, results.perry$Omega[,1,][mcmc_draw,2])
    bot = pnorm(rep(1,length(perry.data$treatment))*results.perry$beta[mcmc_draw,1,] + perry.data$treatment*results.perry$pi[mcmc_draw])
    TOT = top/bot
    TOT.dist.perry[mcmc_draw] = sum(TOT)/length(bot)
}
## mean and sd
mean(TOT.dist.perry)
sd(TOT.dist.perry)




################# IHAD
ihad.data = read_dta("ihad.data.dta")
load("ihad.fit.rda")



results.ihad = extract(ihad.fit)
ATE.ihad = ATE_fn(results.ihad$beta[,2,], results.ihad$beta2,rep(1,dim(ihad.data)[1]))

                                        # mean and sd
mean(ATE.ihad)
sd(ATE.ihad)

## LATE
LATE.dist.ihad = rep(NA,4000)

for(i in 1:4000){
  LATE.dist.ihad[i] = LATE_fn(gamma1=results.ihad$beta[i,1,],gamma2 = results.ihad$beta[i,2,], pi = results.ihad$pi[i],beta = results.ihad$beta2[i],x_i = rep(1,dim(ihad.data)[1]) ,Omega = results.ihad$Omega[,1,][i,2])
}




## mean and sd
mean(LATE.dist.ihad)
sd(LATE.dist.ihad)

## TOT
## inefficient loop for TOT
TOT.dist.ihad = rep(NA,4000)

for(mcmc_draw in 1:4000){
    mean11 = rep(1,length(ihad.data$treatment))*results.ihad$beta[mcmc_draw,1,] + ihad.data$treatment*results.ihad$pi[mcmc_draw]
    mean12 = rep(1,length(ihad.data$treatment))*results.ihad$beta[mcmc_draw,2,] + results.ihad$beta2[mcmc_draw]
    mean21 = rep(1,length(ihad.data$treatment))*results.ihad$beta[mcmc_draw,1,] + ihad.data$treatment*results.ihad$pi[mcmc_draw]
    mean22 = rep(1,length(ihad.data$treatment))*results.ihad$beta[mcmc_draw,2,] 
    top =  pbivnorm(mean11,mean12, results.ihad$Omega[,1,][mcmc_draw,2]) -  pbivnorm(mean21,mean22, results.ihad$Omega[,1,][mcmc_draw,2])
    bot = pnorm(rep(1,length(ihad.data$treatment))*results.ihad$beta[mcmc_draw,1,] + ihad.data$treatment*results.ihad$pi[mcmc_draw])
    TOT = top/bot
    TOT.dist.ihad[mcmc_draw] = sum(TOT)/length(bot)
}
## mean and sd
mean(TOT.dist.ihad)
sd(TOT.dist.ihad)


################# STAR
star.data = read_dta("star.data.dta")
load("star.fit.rda")

results.star = extract(star.fit)


#ATE.star = pnorm(results.star$beta[,2,]*1 + results.star$beta2) - (pnorm(results.star$beta[,2,]*1))
ATE.star = ATE_fn(results.star$beta[,2,], results.star$beta2,rep(1,dim(star.data)[1]))

## mean and sd
mean(ATE.star)
sd(ATE.star)

## LATE
LATE.dist.star = rep(NA,4000)

for(i in 1:4000){
  LATE.dist.star[i] = LATE_fn(gamma1=results.star$beta[i,1,],gamma2 = results.star$beta[i,2,], pi = results.star$pi[i],beta = results.star$beta2[i],x_i = rep(1,dim(star.data)[1]) ,Omega = results.star$Omega[,1,][i,2])
}
## mean and sd
mean(LATE.dist.star)
sd(LATE.dist.star)


## TOT
## inefficient loop for TOT
TOT.dist.star = rep(NA,4000)

for(mcmc_draw in 1:4000){
    mean11 = rep(1,length(star.data$treatment))*results.star$beta[mcmc_draw,1,] + star.data$treatment*results.star$pi[mcmc_draw]
    mean12 = rep(1,length(star.data$treatment))*results.star$beta[mcmc_draw,2,] + results.star$beta2[mcmc_draw]
    mean21 = rep(1,length(star.data$treatment))*results.star$beta[mcmc_draw,1,] + star.data$treatment*results.star$pi[mcmc_draw]
    mean22 = rep(1,length(star.data$treatment))*results.star$beta[mcmc_draw,2,] 
    top =  pbivnorm(mean11,mean12, results.star$Omega[,1,][mcmc_draw,2]) -  pbivnorm(mean21,mean22, results.star$Omega[,1,][mcmc_draw,2])
    bot = pnorm(rep(1,length(star.data$treatment))*results.star$beta[mcmc_draw,1,] + star.data$treatment*results.star$pi[mcmc_draw])
    TOT = top/bot
    TOT.dist.star[mcmc_draw] = sum(TOT)/length(bot)
}
## mean and sd
mean(TOT.dist.star)
sd(TOT.dist.star)



#### TABLE 2
perry = c(100*apply(perry.data[perry.data$treatment == 1, c(1,2)],2, mean), dim(perry.data[perry.data$treatment == 1, c(1,2)])[1], 100*apply(perry.data[perry.data$treatment == 0, c(1,2)],2, mean), dim(perry.data[perry.data$treatment == 0, c(1,2)])[1] )
ihad = c(100*apply(ihad.data[ihad.data$treatment == 1, c(1,2)],2, mean), dim(ihad.data[ihad.data$treatment == 1, c(1,2)])[1], 100*apply(ihad.data[ihad.data$treatment == 0, c(1,2)],2, mean), dim(ihad.data[ihad.data$treatment == 0, c(1,2)])[1] )
star = c(100*apply(star.data[star.data$treatment == 1, c(1,2)],2, mean), dim(star.data[star.data$treatment == 1, c(1,2)])[1], 100*apply(star.data[star.data$treatment == 0, c(1,2)],2, mean), dim(star.data[star.data$treatment == 0, c(1,2)])[1] )

forTable3 = data.frame(perry, ihad, star)

## final editing has to be done to produce exact replicate of table design in paper
print(xtable(forTable3, caption = "Description of educational experiment data from Sondheimer and Green (2010)", label = "ApplicationTab3", align = "lccc"), include.colnames = FALSE,  include.rownames=FALSE)


####### pooled estimates
### ate and late are the same for each individual, since we don't have covariates other than intercept
### thus, only need reweighting of each by
set.seed(12345)
pooled.rho = ((123/985)*results.perry$Omega[,1,][,2]) + ((51/985)*results.ihad$Omega[,1,][,2]) + ((811/985)*results.star$Omega[,1,][,2]) 
mean(pooled.rho)
sd(pooled.rho)
set.seed = 12345
pooled.ATE = ((123/985)*ATE.perry) + ((51/985)*ATE.ihad) + ((811/985)*ATE.star) 
pooled.LATE = ((123/985)*LATE.dist.perry) + ((51/985)*LATE.dist.ihad) + ((811/985)*LATE.dist.star)
pooled.TOT =  ((123/985)*TOT.dist.perry) + ((51/985)*TOT.dist.ihad) + ((811/985)*TOT.dist.star)

mean(pooled.ATE)
sd(pooled.ATE)
mean(pooled.TOT)
sd(pooled.TOT)
mean(pooled.LATE)
sd(pooled.LATE)

### p(ate>0)
###
ate_smaller0 = ifelse(pooled.ATE <= 0, 1, 0)
mean(ate_smaller0)

###
att_smaller0 = ifelse(pooled.TOT <= 0, 1, 0)
mean(att_smaller0)



### plots
colors <- brewer.pal(8, "Set2")


#### ATE
pdf("Figure_3a.pdf")
#postscript("ATE_Final.ps")
par(mar=c(2,4,2,1))
plot(density(ATE.perry),col=colors[1],lwd=4,lty=1,xlim=c(-0.8,0.8),ylim=c(0,3.5)
	,axes=TRUE,xlab="",main="Average Treatment Effect")
#abline(v= 0.1566,lwd=3,lty=1,col=this[1])
par(new=T)
plot(density(ATE.ihad),col=colors[2],lwd=4,lty=2,xlim=c(-0.8,0.8),ylim=c(0,3.5)
	,axes=FALSE,xlab="",ylab="",main="")
#abline(v=0.05639 ,lwd=3,lty=2,col=this[2])
par(new=T)
plot(density(ATE.star),col=colors[3],lwd=4,lty=3,xlim=c(-0.8,0.8),ylim=c(0,3.5)
	,axes=FALSE,xlab="",ylab="",main="")
#abline(v=0.3490,lwd=3,lty=3,col=this[3])
par(new=T)
plot(density(pooled.ATE),col=colors[4],lwd=4,lty=5,xlim=c(-0.8,0.8),ylim=c(0,3.5)
	,axes=FALSE,xlab="",ylab="",main="")
legend("topright",bty="n", legend=c("Perry","IHAD","STAR","Pooled"), 
	lty=c(1,2,3,5),col=c(colors[1],colors[2],colors[3],colors[4]),lwd=2,cex=1)
  dev.off()

### TOT
pdf("Figure_3b.pdf")
#postscript("TOT_Final.ps")
par(mar=c(2,4,2,1))
plot(density(TOT.dist.perry),col=colors[1],lwd=4,lty=1,xlim=c(-0.9,0.8),ylim=c(0,4.5)
	,axes=TRUE,xlab="",main="Effect of Treatment on the Treated")
#abline(v= 0.1566,lwd=2,lty=1,col=this[1])
par(new=T)
plot(density(TOT.dist.ihad),col=colors[2],lwd=4,lty=2,xlim=c(-0.9,0.8),ylim=c(0,4.5)
	,axes=FALSE,xlab="",ylab="",main="")
#abline(v=0.05639 ,lwd=2,lty=2,col=this[2])
par(new=T)
plot(density(TOT.dist.star),col=colors[3],lwd=4,lty=3,xlim=c(-0.9,0.8),ylim=c(0,4.5)
	,axes=FALSE,xlab="",ylab="",main="")
#abline(v=0.3490,lwd=2,lty=3,col=this[3])
par(new=T)
plot(density(pooled.TOT),col=colors[4],lwd=4,lty=5,xlim=c(-0.9,0.8),ylim=c(0,4.5)
	,axes=FALSE,xlab="",ylab="",main="")
#abline(v=0.3490,lwd=2,lty=3,col=this[3])
legend("topright",bty="n", legend=c("Perry","IHAD","STAR","Pooled"), 
	lty=c(1,2,3,5),col=c(colors[1],colors[2],colors[3],colors[4]),lwd=4,cex=1)
dev.off()



#### combined coefs and sd for bayes 
sumVar = ((summary(perry.fit)[[1]]["beta2","sd"]^2) + (summary(ihad.fit)[[1]]["beta2","sd"]^2)+(summary(perry.fit)[[1]]["beta2","sd"]^2))

wPerry = (summary(perry.fit)[[1]]["beta2","sd"]^2 )/sumVar
Perry_coef =  summary(perry.fit)[[1]]["beta2","mean"] 

wIhad = (summary(ihad.fit)[[1]]["beta2","sd"]^2)/sumVar
ihad_coef = summary(ihad.fit)[[1]]["beta2","mean"]

wStar = (summary(star.fit)[[1]]["beta2","sd"]^2)/sumVar
star_coef = summary(star.fit)[[1]]["beta2","mean"]

#weights sum to one:
sum(c(wStar, wIhad, wPerry))

### combined coef:
combined.Bayes.coef = ((star_coef*wStar) + (ihad_coef*wIhad)  +  (Perry_coef*wPerry))

## combined se:
combined.Bayes.se = sqrt(sumVar)/3


### create Table 3 in Manuscript
perry.ml = c(round(biprobit[1,"perry"],2),paste("(",round(biprobit[2,"perry"],2),")"), round(biprobit[3,"perry"],2),paste("(",round(biprobit[4,"perry"],2),")"), round(biprobit[5,"perry"],2),paste("(",round(biprobit[6,"perry"],2),")"), round(biprobit[7,"perry"],2),paste("(",round(biprobit[8,"perry"],2),")"), 123, NA, NA)

perry.bayes = c(round(summary(perry.fit)[[1]]["beta[1,1]","mean"],2), paste("(",round(summary(perry.fit)[[1]]["beta[1,1]","sd"],2), ")"), round(summary(perry.fit)[[1]]["pi","mean"],2), paste("(",round(summary(perry.fit)[[1]]["pi","sd"],2), ")"),round(summary(perry.fit)[[1]]["beta[2,1]","mean"],2), paste("(",round(summary(perry.fit)[[1]]["beta[2,1]","sd"],2), ")"),round(summary(perry.fit)[[1]]["beta2","mean"],2), paste("(",round(summary(perry.fit)[[1]]["beta2","sd"],2), ")"), 123, round(summary(perry.fit)[[1]]["Omega[1,2]","mean"],2), paste("(",round(summary(perry.fit)[[1]]["Omega[1,2]","sd"],2), ")") )

ihad.ml = c(round(biprobit[1,"ihad"],2),paste("(",round(biprobit[2,"ihad"],2),")"), round(biprobit[3,"ihad"],2),paste("(",round(biprobit[4,"ihad"],2),")"), round(biprobit[5,"ihad"],2),paste("(",round(biprobit[6,"ihad"],2),")"), round(biprobit[7,"ihad"],2),paste("(",round(biprobit[8,"ihad"],2),")"), 58, NA, NA)

ihad.bayes = c(round(summary(ihad.fit)[[1]]["beta[1,1]","mean"],2), paste("(",round(summary(ihad.fit)[[1]]["beta[1,1]","sd"],2), ")"), round(summary(ihad.fit)[[1]]["pi","mean"],2), paste("(",round(summary(ihad.fit)[[1]]["pi","sd"],2), ")"),round(summary(ihad.fit)[[1]]["beta[2,1]","mean"],2), paste("(",round(summary(ihad.fit)[[1]]["beta[2,1]","sd"],2), ")"),round(summary(ihad.fit)[[1]]["beta2","mean"],2), paste("(",round(summary(ihad.fit)[[1]]["beta2","sd"],2), ")"), 123, round(summary(ihad.fit)[[1]]["Omega[1,2]","mean"],2), paste("(",round(summary(ihad.fit)[[1]]["Omega[1,2]","sd"],2), ")") )

star.ml = c(round(biprobit[1,"star"],2),paste("(",round(biprobit[2,"star"],2),")"), round(biprobit[3,"star"],2),paste("(",round(biprobit[4,"star"],2),")"), round(biprobit[5,"star"],2),paste("(",round(biprobit[6,"star"],2),")"), round(biprobit[7,"star"],2),paste("(",round(biprobit[8,"star"],2),")"), 811, NA, NA)

star.bayes = c(round(summary(star.fit)[[1]]["beta[1,1]","mean"],2), paste("(",round(summary(star.fit)[[1]]["beta[1,1]","sd"],2), ")"), round(summary(star.fit)[[1]]["pi","mean"],2), paste("(",round(summary(star.fit)[[1]]["pi","sd"],2), ")"),round(summary(star.fit)[[1]]["beta[2,1]","mean"],2), paste("(",round(summary(star.fit)[[1]]["beta[2,1]","sd"],2), ")"),round(summary(star.fit)[[1]]["beta2","mean"],2), paste("(",round(summary(star.fit)[[1]]["beta2","sd"],2), ")"), 123, round(summary(star.fit)[[1]]["Omega[1,2]","mean"],2), paste("(",round(summary(star.fit)[[1]]["Omega[1,2]","sd"],2), ")") )

combined.ml <- c(rep(NA, 6), round(combined_coef,2), paste("(",round( combined_SE,2), ")"), 992, NA, NA)
combined.bayes <- c(rep(NA, 6), round(combined.Bayes.coef,2), paste("(", round(combined.Bayes.se,2), ")"), 992, NA, NA)

forTable4 = data.frame(perry.ml, perry.bayes, ihad.ml, ihad.bayes, star.ml, star.bayes, combined.ml, combined.bayes)

### TABLE 3, requires final editing in latex, such as column names, row names etc
print(xtable(forTable4, caption = "Bivariate probit regression and Bayesian IV results for the downstream effects of educationalattainment on turnout", label = "ApplicationTab4", align = "lcccccccc"), include.colnames = FALSE,  include.rownames=FALSE)
